{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from __future__ import print_function, division\n",
    "from IPython.core import display\n",
    "import matplotlib.pyplot as plt\n",
    "%matplotlib inline\n",
    "import numpy as np\n",
    "\n",
    "#If you are running on a server, launch xvfb to record game videos\n",
    "#Please make sure you have xvfb installed\n",
    "import os\n",
    "if type(os.environ.get(\"DISPLAY\")) is not str or len(os.environ.get(\"DISPLAY\"))==0:\n",
    "    !bash ../xvfb start\n",
    "    %env DISPLAY=:1"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Kung-Fu, recurrent style\n",
    "\n",
    "In this notebook we'll once again train RL agent for for atari [KungFuMaster](https://gym.openai.com/envs/KungFuMaster-v0/), this time using recurrent neural networks.\n",
    "\n",
    "![http://www.retroland.com/wp-content/uploads/2011/07/King-Fu-Master.jpg](http://www.retroland.com/wp-content/uploads/2011/07/King-Fu-Master.jpg)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import gym\n",
    "from atari_util import PreprocessAtari\n",
    "\n",
    "def make_env():\n",
    "    env = gym.make(\"KungFuMasterDeterministic-v0\")\n",
    "    env = PreprocessAtari(env, height=42, width=42,\n",
    "                          crop = lambda img: img[60:-30, 15:],\n",
    "                          color=False, n_frames=1)\n",
    "    return env\n",
    "\n",
    "env = make_env()\n",
    "\n",
    "obs_shape = env.observation_space.shape\n",
    "n_actions = env.action_space.n\n",
    "\n",
    "print(\"Observation shape:\", obs_shape)\n",
    "print(\"Num actions:\", n_actions)\n",
    "print(\"Action names:\", env.env.env.get_action_meanings())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "s = env.reset()\n",
    "for _ in range(100):\n",
    "    s, _, _, _ = env.step(env.action_space.sample())\n",
    "\n",
    "plt.title('Game image')\n",
    "plt.imshow(env.render('rgb_array'))\n",
    "plt.show()\n",
    "\n",
    "plt.title('Agent observation')\n",
    "plt.imshow(s.reshape([42,42]))\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### POMDP setting\n",
    "\n",
    "The atari game we're working with is actually a POMDP: your agent needs to know timing at which enemies spawn and move, but cannot do so unless it has some memory. \n",
    "\n",
    "Let's design another agent that has a recurrent neural net memory to solve this. Here's a sketch.\n",
    "\n",
    "![img](img1.jpg)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import torch\n",
    "import torch.nn as nn\n",
    "\n",
    "# a special module that converts [batch, channel, w, h] to [batch, units]\n",
    "class Flatten(nn.Module):\n",
    "    def forward(self, input):\n",
    "        return input.view(input.size(0), -1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "class SimpleRecurrentAgent(nn.Module):\n",
    "    def __init__(self, obs_shape, n_actions, reuse=False):\n",
    "        \"\"\"A simple actor-critic agent\"\"\"\n",
    "        super(self.__class__, self).__init__()\n",
    "        \n",
    "        self.conv0 = nn.Conv2d(1, 32, kernel_size=(3,3), stride=(2,2))\n",
    "        self.conv1 = nn.Conv2d(32, 32, kernel_size=(3,3), stride=(2,2))\n",
    "        self.conv2 = nn.Conv2d(32, 32, kernel_size=(3,3), stride=(2,2))\n",
    "        self.flatten = Flatten()\n",
    "\n",
    "        self.hid = nn.Linear(512, 128)\n",
    "        self.rnn = nn.LSTMCell(128, 128)\n",
    "\n",
    "        self.logits = nn.Linear(128, n_actions)\n",
    "        self.state_value = nn.Linear(128, 1)\n",
    "        \n",
    "        \n",
    "    def forward(self, prev_state, obs_t):\n",
    "        \"\"\"\n",
    "        Takes agent's previous step and observation, \n",
    "        returns next state and whatever it needs to learn (tf tensors)\n",
    "        \"\"\"\n",
    "        \n",
    "        # YOUR CODE: apply the whole neural net for one step here.\n",
    "        # See docs on self.rnn(...)\n",
    "        # the recurrent cell should take the last feedforward dense layer as input        \n",
    "        <YOUR CODE>\n",
    "        \n",
    "        new_state = <YOUR CODE>\n",
    "        logits = <YOUR CODE>\n",
    "        state_value = <YOUR CODE>\n",
    "        \n",
    "        return new_state, (logits, state_value)\n",
    "    \n",
    "    def get_initial_state(self, batch_size):\n",
    "        \"\"\"Return a list of agent memory states at game start. Each state is a np array of shape [batch_size, ...]\"\"\"\n",
    "        return torch.zeros((batch_size, 128)), torch.zeros((batch_size, 128))\n",
    "    \n",
    "    def sample_actions(self, agent_outputs):\n",
    "        \"\"\"pick actions given numeric agent outputs (np arrays)\"\"\"\n",
    "        logits, state_values = agent_outputs\n",
    "        probs = torch.softmax(logits, dim=-1)\n",
    "        return torch.multinomial(probs, 1)[:, 0].data.numpy()\n",
    "    \n",
    "    def step(self, prev_state, obs_t):\n",
    "        \"\"\" like forward, but obs_t is not torch.tensor \"\"\"\n",
    "        obs_t = torch.tensor(np.array(obs_t), dtype=torch.float32)\n",
    "        (h, c), (l, s) = self.forward(prev_state, obs_t)\n",
    "        return (h.detach(), c.detach()), (l.detach(), s.detach())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "n_parallel_games = 5\n",
    "gamma = 0.99\n",
    "\n",
    "agent = SimpleRecurrentAgent(obs_shape, n_actions)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "state = [env.reset()]\n",
    "_, (logits, value) = agent.step(agent.get_initial_state(1), state)\n",
    "print(\"action logits:\\n\", logits)\n",
    "print(\"state values:\\n\", value)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Let's play!\n",
    "Let's build a function that measures agent's average reward."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def evaluate(agent, env, n_games=1):\n",
    "    \"\"\"Plays an entire game start to end, returns session rewards.\"\"\"\n",
    "\n",
    "    game_rewards = []\n",
    "    for _ in range(n_games):\n",
    "        # initial observation and memory\n",
    "        observation = env.reset()\n",
    "        prev_memories = agent.get_initial_state(1)\n",
    "\n",
    "        total_reward = 0\n",
    "        while True:\n",
    "            new_memories, readouts = agent.step(prev_memories, observation[None, ...])\n",
    "            action = agent.sample_actions(readouts)\n",
    "\n",
    "            observation, reward, done, info = env.step(action)\n",
    "\n",
    "            total_reward += reward\n",
    "            prev_memories = new_memories\n",
    "            if done: break\n",
    "                \n",
    "        game_rewards.append(total_reward)\n",
    "    return game_rewards\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "env_monitor = gym.wrappers.Monitor(env, directory=\"kungfu_videos\", force=True)\n",
    "rw = evaluate(agent, env_monitor, n_games=3,)\n",
    "env_monitor.close()\n",
    "print (rw)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "#show video\n",
    "from IPython.display import HTML\n",
    "import os\n",
    "\n",
    "video_names = list(filter(lambda s:s.endswith(\".mp4\"),os.listdir(\"./kungfu_videos/\")))\n",
    "\n",
    "HTML(\"\"\"\n",
    "<video width=\"640\" height=\"480\" controls>\n",
    "  <source src=\"{}\" type=\"video/mp4\">\n",
    "</video>\n",
    "\"\"\".format(\"./kungfu_videos/\"+video_names[-1])) #this may or may not be _last_ video. Try other indices\n",
    "\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Training on parallel games\n",
    "\n",
    "We introduce a class called EnvPool - it's a tool that handles multiple environments for you. Here's how it works:\n",
    "![img](img2.jpg)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from env_pool import EnvPool\n",
    "pool = EnvPool(agent, make_env, n_parallel_games)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We gonna train our agent on a thing called __rollouts:__\n",
    "![img](img3.jpg)\n",
    "\n",
    "A rollout is just a sequence of T observations, actions and rewards that agent took consequently.\n",
    "* First __s0__ is not necessarily initial state for the environment\n",
    "* Final state is not necessarily terminal\n",
    "* We sample several parallel rollouts for efficiency"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# for each of n_parallel_games, take 10 steps\n",
    "rollout_obs, rollout_actions, rollout_rewards, rollout_mask = pool.interact(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "print(\"Actions shape:\", rollout_actions.shape)\n",
    "print(\"Rewards shape:\", rollout_rewards.shape)\n",
    "print(\"Mask shape:\", rollout_mask.shape)\n",
    "print(\"Observations shape: \",rollout_obs.shape)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Actor-critic objective\n",
    "\n",
    "Here we define a loss function that uses rollout above to train advantage actor-critic agent.\n",
    "\n",
    "\n",
    "Our loss consists of three components:\n",
    "\n",
    "* __The policy \"loss\"__\n",
    " $$ \\hat J = {1 \\over T} \\cdot \\sum_t { \\log \\pi(a_t | s_t) } \\cdot A_{const}(s,a) $$\n",
    "  * This function has no meaning in and of itself, but it was built such that\n",
    "  * $ \\nabla \\hat J = {1 \\over N} \\cdot \\sum_t { \\nabla \\log \\pi(a_t | s_t) } \\cdot A(s,a) \\approx \\nabla E_{s, a \\sim \\pi} R(s,a) $\n",
    "  * Therefore if we __maximize__ J_hat with gradient descent we will maximize expected reward\n",
    "  \n",
    "  \n",
    "* __The value \"loss\"__\n",
    "  $$ L_{td} = {1 \\over T} \\cdot \\sum_t { [r + \\gamma \\cdot V_{const}(s_{t+1}) - V(s_t)] ^ 2 }$$\n",
    "  * Ye Olde TD_loss from q-learning and alike\n",
    "  * If we minimize this loss, V(s) will converge to $V_\\pi(s) = E_{a \\sim \\pi(a | s)} R(s,a) $\n",
    "\n",
    "\n",
    "* __Entropy Regularizer__\n",
    "  $$ H = - {1 \\over T} \\sum_t \\sum_a {\\pi(a|s_t) \\cdot \\log \\pi (a|s_t)}$$\n",
    "  * If we __maximize__ entropy we discourage agent from predicting zero probability to actions\n",
    "  prematurely (a.k.a. exploration)\n",
    "  \n",
    "  \n",
    "So we optimize a linear combination of $L_{td}$ $- \\hat J$, $-H$\n",
    "  \n",
    "```\n",
    "\n",
    "```\n",
    "\n",
    "```\n",
    "\n",
    "```\n",
    "\n",
    "```\n",
    "\n",
    "```\n",
    "\n",
    "\n",
    "__One more thing:__ since we train on T-step rollouts, we can use N-step formula for advantage for free:\n",
    "  * At the last step, $A(s_t,a_t) = r(s_t, a_t) + \\gamma \\cdot V(s_{t+1}) - V(s) $\n",
    "  * One step earlier, $A(s_t,a_t) = r(s_t, a_t) + \\gamma \\cdot r(s_{t+1}, a_{t+1}) + \\gamma ^ 2 \\cdot V(s_{t+2}) - V(s) $\n",
    "  * Et cetera, et cetera. This way agent starts training much faster since it's estimate of A(s,a) depends less on his (imperfect) value function and more on actual rewards. There's also a [nice generalization](https://arxiv.org/abs/1506.02438) of this.\n",
    "\n",
    "\n",
    "__Note:__ it's also a good idea to scale rollout_len up to learn longer sequences. You may wish set it to >=20 or to start at 10 and then scale up as time passes."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def to_one_hot(y, n_dims=None):\n",
    "    \"\"\" Take an integer vector (tensor of variable) and convert it to 1-hot matrix. \"\"\"\n",
    "    y_tensor = torch.tensor(y, dtype=torch.int64).view(-1, 1)\n",
    "    n_dims = n_dims if n_dims is not None else int(torch.max(y_tensor)) + 1\n",
    "    y_one_hot = torch.zeros(y_tensor.size()[0], n_dims).scatter_(1, y_tensor, 1)\n",
    "    return y_one_hot"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "opt = torch.optim.Adam(agent.parameters(), lr=1e-5)\n",
    "\n",
    "def train_on_rollout(states, actions, rewards, is_not_done, prev_memory_states, gamma = 0.99):\n",
    "    \"\"\"\n",
    "    Takes a sequence of states, actions and rewards produced by generate_session.\n",
    "    Updates agent's weights by following the policy gradient above.\n",
    "    Please use Adam optimizer with default parameters.\n",
    "    \"\"\"\n",
    "    \n",
    "    # cast everything into a torch.tensor\n",
    "    states = torch.tensor(np.array(states), dtype=torch.float32)   # shape: [batch_size, time, c, h, w]\n",
    "    actions = torch.tensor(np.array(actions), dtype=torch.int64)    # shape: [batch_size, time]\n",
    "    rewards = torch.tensor(np.array(rewards), dtype=torch.float32) # shape: [batch_size, time]\n",
    "    is_not_done = torch.tensor(is_not_done.astype('float32'), dtype=torch.float32)  # shape: [batch_size, time]\n",
    "    rollout_length = rewards.shape[1] - 1\n",
    "\n",
    "    # predict logits, probas and log-probas using an agent. \n",
    "    memory = [m.detach() for m in prev_memory_states]\n",
    "    \n",
    "    logits = [] # append logit sequence here\n",
    "    state_values = [] #append state values here\n",
    "    for t in range(rewards.shape[1]):\n",
    "        obs_t = states[:, t]\n",
    "        \n",
    "        # use agent to comute logits_t and state values_t.\n",
    "        # append them to logits and state_values array\n",
    "        \n",
    "        memory, (logits_t, values_t) = <YOUR CODE>\n",
    "        \n",
    "        logits.append(logits_t)\n",
    "        state_values.append(values_t)\n",
    "        \n",
    "        \n",
    "    logits = torch.stack(logits, dim=1)\n",
    "    state_values = torch.stack(state_values, dim=1)\n",
    "    probas = torch.softmax(logits, dim=2)\n",
    "    logprobas = torch.log_softmax(logits, dim=2)\n",
    "        \n",
    "    # select log-probabilities for chosen actions, log pi(a_i|s_i)\n",
    "    actions_one_hot = to_one_hot(actions, n_actions).view(\n",
    "        actions.shape[0], actions.shape[1], n_actions)\n",
    "    logprobas_for_actions = torch.sum(logprobas * actions_one_hot, dim = -1)\n",
    "    \n",
    "    # Now let's compute two loss components:\n",
    "    # 1) Policy gradient objective. \n",
    "    # Notes: Please don't forget to call .detach() on advantage term. Also please use mean, not sum.\n",
    "    # it's okay to use loops if you want\n",
    "    J_hat = 0 # policy objective as in the formula for J_hat\n",
    "    \n",
    "    # 2) Temporal difference MSE for state values\n",
    "    # Notes: Please don't forget to call on V(s') term. Also please use mean, not sum.\n",
    "    # it's okay to use loops if you want\n",
    "    value_loss = 0\n",
    "#     print(state_values.shape)\n",
    "#     1/0\n",
    "    cumulative_returns = state_values[:, -1].detach()\n",
    "    #print(cumulative_returns)\n",
    "    for t in reversed(range(rollout_length)):\n",
    "        r_t = rewards[:, t]                                # current rewards\n",
    "        V_t = state_values[:, t]                           # current state values\n",
    "        V_next = state_values[:, t + 1].detach()           # next state values\n",
    "        logpi_a_s_t = logprobas_for_actions[:, t]          # log-probability of a_t in s_t\n",
    "        \n",
    "        # update G_t = r_t + gamma * G_{t+1} as we did in week6 reinforce\n",
    "        cumulative_returns = G_t = r_t + gamma * cumulative_returns\n",
    "        # Compute temporal difference error (MSE for V(s))\n",
    "        value_loss += <YOUR CODE>\n",
    "        \n",
    "        # compute advantage A(s_t, a_t) using cumulative returns and V(s_t) as baseline\n",
    "        advantage = <YOUR CODE>\n",
    "        advantage = advantage.detach()\n",
    "        \n",
    "        # compute policy pseudo-loss aka -J_hat.\n",
    "        J_hat += <YOUR CODE>\n",
    "    #print(J_hat)\n",
    "    #regularize with entropy\n",
    "    entropy_reg = <compute entropy regularizer>\n",
    "    \n",
    "    # add-up three loss components and average over time\n",
    "    loss = -J_hat / rollout_length +\\\n",
    "           value_loss / rollout_length +\\\n",
    "           -0.01 * entropy_reg\n",
    "    \n",
    "    #print(loss)\n",
    "    # Gradient descent step\n",
    "    < your code >\n",
    "    \n",
    "    return loss.data.numpy()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# let's test it\n",
    "memory = list(pool.prev_memory_states)\n",
    "rollout_obs, rollout_actions, rollout_rewards, rollout_mask = pool.interact(10)\n",
    "\n",
    "train_on_rollout(rollout_obs, rollout_actions, rollout_rewards, rollout_mask, memory)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Train \n",
    "\n",
    "just run train step and see if agent learns any better"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from IPython.display import clear_output\n",
    "from tqdm import trange\n",
    "from pandas import DataFrame\n",
    "moving_average = lambda x, **kw: DataFrame({'x':np.asarray(x)}).x.ewm(**kw).mean().values\n",
    "\n",
    "rewards_history = []"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "for i in trange(15000):  \n",
    "    \n",
    "    memory = list(pool.prev_memory_states)\n",
    "    rollout_obs, rollout_actions, rollout_rewards, rollout_mask = pool.interact(10)\n",
    "    train_on_rollout(rollout_obs, rollout_actions, rollout_rewards, rollout_mask, memory)    \n",
    "    \n",
    "    if i % 100 == 0: \n",
    "        rewards_history.append(np.mean(evaluate(agent, env, n_games=1)))\n",
    "        clear_output(True)\n",
    "        plt.plot(rewards_history, label='rewards')\n",
    "        plt.plot(moving_average(np.array(rewards_history),span=10), label='rewards ewma@10')\n",
    "        plt.legend()\n",
    "        plt.show()\n",
    "        if rewards_history[-1] >= 10000:\n",
    "            print(\"Your agent has just passed the minimum homework threshold\")\n",
    "            break"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Relax and grab some refreshments while your agent is locked in an infinite loop of violence and death.\n",
    "\n",
    "__How to interpret plots:__\n",
    "\n",
    "The session reward is the easy thing: it should in general go up over time, but it's okay if it fluctuates ~~like crazy~~. It's also OK if it reward doesn't increase substantially before some 10k initial steps. However, if reward reaches zero and doesn't seem to get up over 2-3 evaluations, there's something wrong happening.\n",
    "\n",
    "\n",
    "Since we use a policy-based method, we also keep track of __policy entropy__ - the same one you used as a regularizer. The only important thing about it is that your entropy shouldn't drop too low (`< 0.1`) before your agent gets the yellow belt. Or at least it can drop there, but _it shouldn't stay there for long_.\n",
    "\n",
    "If it does, the culprit is likely:\n",
    "* Some bug in entropy computation. Remember that it is $ - \\sum p(a_i) \\cdot log p(a_i) $\n",
    "* Your agent architecture converges too fast. Increase entropy coefficient in actor loss. \n",
    "* Gradient explosion - just [clip gradients](https://stackoverflow.com/a/43486487) and maybe use a smaller network\n",
    "* Us. Or TF developers. Or aliens. Or lizardfolk. Contact us on forums before it's too late!\n",
    "\n",
    "If you're debugging, just run `logits, values = agent.step(batch_states)` and manually look into logits and values. This will reveal the problem 9 times out of 10: you'll likely see some NaNs or insanely large numbers or zeros. Try to catch the moment when this happens for the first time and investigate from there."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### \"Final\" evaluation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "env_monitor = gym.wrappers.Monitor(env, directory=\"kungfu_videos\", force=True)\n",
    "final_rewards = evaluate(agent, env_monitor, n_games=20,)\n",
    "env_monitor.close()\n",
    "print(\"Final mean reward\", np.mean(final_rewards))\n",
    "\n",
    "video_names = list(filter(lambda s:s.endswith(\".mp4\"),os.listdir(\"./kungfu_videos/\")))\n",
    "HTML(\"\"\"\n",
    "<video width=\"640\" height=\"480\" controls>\n",
    "  <source src=\"{}\" type=\"video/mp4\">\n",
    "</video>\n",
    "\"\"\".format(\"./kungfu_videos/\"+video_names[-1])) #this may or may not be _last_ video. Try other indices\n",
    "\n"
   ]
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
